#!/usr/bin/perl

#   Vpl2vpl: a program to generate accented virtual fonts for TeX
#   Copyright (C) 1997 John D. Smith

#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.

#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.

#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

$version = 0.25;
#------------------------------------------------------------------------#
$description =
"Syntax: vpl2vpl -d definition-file [-s shrink-factor]
           [-c candrabindu-adjustment] [-b] vpl-file

Vpl2vpl creates new TeX virtual fonts based on existing fonts or
virtual fonts (\"input fonts\"). A successful run will read a pl
(Property List) or vpl (Virtual Property List) file and a definition
file, and will generate a new vpl (Virtual Property List) file on
standard output. The input font is assumed to adhere to the standard
TeX encoding for text fonts unless it was created with either of the
programs afm2pl or afm2tfm, in which case it is assumed to conform to
(respectively) the Adobe Standard Encoding or the encoding specified
in the file dvips.enc. In either case, the name of the input font is
assumed to be the name of the input file without its .vpl or .pl
extension: it must conform to normal TeX conventions for naming fonts,
as vpl2vpl attempts to draw conclusions from it about the kind of font
it is dealing with.

A typical complete sequence of commands to create a new virtual
font might therefore be
     tftopl cmr10.tfm cmr10.pl
     vpl2vpl -d ISO-Latin1.def cmr10.pl >cmr10_isol1.vpl
     vptovf cmr10_isol1.vpl cmr10_isol1.vf cmr10_isol1.tfm
for a Computer Modern font, or
     afm2pl Times-Roman.afm rptmr.pl
     pltotf rptmr.pl rptmr.tfm
     vpl2vpl -d ISO-Latin1.def rptmr.pl >ptmr-isol1.vpl
     vptovf ptmr-isol1.vpl ptmr-isol1.vf ptmr-isol1.tfm
for a PostScript font.

Another approach for a PostScript font is to use afm2tfm:
     afm2tfm Times-Roman.afm -t dvips.enc -v ptmr rptmr
     vpl2vpl -d ISO-Latin1.def ptmr.vpl >ptmr-isol1.vpl
     vptovf ptmr-isol1.vpl ptmr-isol1.vf ptmr-isol1.tfm
-- but this is now deprecated, as afm2tfm generates incorrect
values for the heights of some characters, and this can lead to
bad accent placing.

In order to keep the whole upper half of the character set free for
the requirements of the encoding specified in the definition file,
certain modifications are made to input fonts following the
dvips.enc encoding to bring them into greater conformity with the
TeX norm. In particular, the characters dotaccent and hungarumlaut
are placed in the positions assigned by TeX (\"5F, \"7D), not those
enforced by dvips.enc (\"C7, \"CD). The f-ligatures, double quotes
and dashes are also moved from the upper half of the character set
to their normal TeX positions. As a result, the following characters
are not found in the lower half of the character set: quotesingle,
quotedbl, backslash, underscore, braceleft, bar, braceright. These
characters can, however, be assigned positions in the output font if
they are needed. (Indeed, they could all be explicitly restored to
their dvips.enc positions if this were desired.)

Options:

  -d should refer to a font definition file. This file (which could
     usefully be named, e.g., \"French.def\") should consist of
     lines of character definitions, in the form
               \"number\"   \"character\"
     or
               \"number\"   \"character\"   \"accent\"
     Here \"number\" represents the character's position in the new
     encoding and may be expressed in decimal, octal or hex;
     \"character\" names the character (e.g. \"comma\", \"eight\",
     \"A\") or consists of the word \".notdef\" (indicating that
     the specified number's \"slot\" in the new encoding is to be
     empty); and \"accent\" optionally names an accent to be placed
     on the character. In addition to the standard accents available
     in PostScript fonts, \"underbar\" and \"underdot\" are also
     available, as are \"under\" versions of all the normal
     superscript accents (\"underdieresis\", \"underring\", etc.).
     The Indian accent \"candrabindu\" may also be specified: it
     is formed by overprinting a breve with a dotaccent. Finally,
     \"overdot\" may be used as a synonym for \"dotaccent\".

     If the character named in the \"accent\" position is not in fact
     a valid accent character, the program interprets the definition
     as a request for a digraph formed from the \"character\" and the
     \"accent\". A digraph consisting of, say, \"k\" and \"h\" will be
     indistinguishable from the letters \"k\" and \"h\" printed
     consecutively, but the digraph \"kh\" can itself receive accents
     like any other character: see next paragraph.

     A new character (such as \"amacron\" or \"kh\") may be freely
     used in the \"character\" position of a further definition (such
     as \"amacron breve\" or \"kh underbar\"). There is no constraint
     on the ordering of definitions within a definition file. The
     definition of \"a macron\" does not have to precede that of
     \"amacron breve\": requests for \"impossible\" characters are
     deferred until their constituents have had a chance to come into
     being.

     \"Slots\" for which no new definition is given retain the
     definition they have in the input font.

     The definition file may also contain blank lines and comments
     (introduced by \"\#\").

  -s may optionally give the factor, expressed as a per-thousand
     value, by which normally superscript accents (such as dieresis,
     ring) should be shrunk when they are used as subscript accents
     (such as underdieresis, underring). Values of around 800 may be
     found useful.

  -c may optionally give two comma-separated numerical values to
     adjust the x and y coordinates of the dotaccent placed within a
     breve to form the candrabindu accent. A coordinate scheme using
     \"DESIGNUNITS R 1000\" is assumed.

  -b may optionally be specified to block the use of predefined
     accented characters, forcing vpl2vpl to define its own
     versions. This may be useful to secure a consistent appearance
     in cases where a font designer does not share vpl2vpl's views
     on where accents should be placed.

  -h prints this help.
";
#------------------------------------------------------------------------#

########################
# Packages and constants
########################
#
use File::Basename;
use Getopt::Std;
$cmdline = basename($0) . " " . join " ", @ARGV;
getopts('d:s:c:bh');
if  ($opt_h or !$opt_d or $#ARGV != 0) {
   print STDERR $description;
   exit 1;
}

$filename = $ARGV[0];
($fontname = $filename) =~ s/\..*$//;
($encname = basename($opt_d)) =~ s/\..*$//;
$vtitle = "(VTITLE Font $fontname modified for $encname encoding by vpl2vpl";
$vtitle .= " v. $version" if $version;
$vtitle .= ")\n(COMMENT Command line: $cmdline)";

#
# Flags for bold and small caps. These are probably a bit iffy, but
# there's not much that can be done about it.
#
if ($fontname =~ /(^p.*b[oi]?[c]?$|^[^p].*bx[a-z]*[0-9]+$)/) { $bold = 1 }
if ($fontname =~ /(^p.*c$|^[^p].*csc[a-z]*[0-9]+$)/)         { $scaps = 1 }
if ($opt_s) { $shrink = $opt_s / 1000 } else { $shrink = 1 }

#
# Array to convert from number to vpl representation
#
foreach $i (0 .. 255) {
   $nv[$i] = (chr($i) =~ /[0-9A-Za-z]/ ? "C " . chr $i : sprintf("O %lo", $i));
}

#
# Now the encoding vectors.
#
@TeXenc = (
   "Gamma",          "Delta",          "Theta",          "Lambda",
   "Xi",             "Pi",             "Sigma",          "Upsilon",
   "Phi",            "Psi",            "Omega",          "ff",
   "fi",             "fl",             "ffi",            "ffl",
   "dotlessi",       "dotlessj",       "grave",          "acute",
   "caron",          "breve",          "macron",         "ring",
   "cedilla",        "germandbls",     "ae",             "oe",
   "oslash",         "AE",             "OE",             "Oslash",
   "space",          "exclam",         "quotedblright",  "numbersign",
   "dollar",         "percent",        "ampersand",      "quoteright",
   "parenleft",      "parenright",     "asterisk",       "plus",
   "comma",          "hyphen",         "period",         "slash",
   "zero",           "one",            "two",            "three",
   "four",           "five",           "six",            "seven",
   "eight",          "nine",           "colon",          "semicolon",
   "exclamdown",     "equal",          "questiondown",   "question",
   "at",             "A",              "B",              "C",
   "D",              "E",              "F",              "G",
   "H",              "I",              "J",              "K",
   "L",              "M",              "N",              "O",
   "P",              "Q",              "R",              "S",
   "T",              "U",              "V",              "W",
   "X",              "Y",              "Z",              "bracketleft",
   "quotedblleft",   "bracketright",   "circumflex",     "dotaccent",
   "quoteleft",      "a",              "b",              "c",
   "d",              "e",              "f",              "g",
   "h",              "i",              "j",              "k",
   "l",              "m",              "n",              "o",
   "p",              "q",              "r",              "s",
   "t",              "u",              "v",              "w",
   "x",              "y",              "z",              "endash",
   "emdash",         "hungarumlaut",   "tilde",          "dieresis"
);

@dvipsenc = (
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "quotesingle",    "exclamdown",     "questiondown",
   "dotlessi",       "dotlessj",       "grave",          "acute",
   "caron",          "breve",          "macron",         "ring",
   "cedilla",        "germandbls",     "ae",             "oe",
   "oslash",         "AE",             "OE",             "Oslash",
   "space",          "exclam",         "quotedbl",       "numbersign",
   "dollar",         "percent",        "ampersand",      "quoteright",
   "parenleft",      "parenright",     "asterisk",       "plus",
   "comma",          "hyphen",         "period",         "slash",
   "zero",           "one",            "two",            "three",
   "four",           "five",           "six",            "seven",
   "eight",          "nine",           "colon",          "semicolon",
   "less",           "equal",          "greater",        "question",
   "at",             "A",              "B",              "C",
   "D",              "E",              "F",              "G",
   "H",              "I",              "J",              "K",
   "L",              "M",              "N",              "O",
   "P",              "Q",              "R",              "S",
   "T",              "U",              "V",              "W",
   "X",              "Y",              "Z",              "bracketleft",
   "backslash",      "bracketright",   "circumflex",     "underscore",
   "quoteleft",      "a",              "b",              "c",
   "d",              "e",              "f",              "g",
   "h",              "i",              "j",              "k",
   "l",              "m",              "n",              "o",
   "p",              "q",              "r",              "s",
   "t",              "u",              "v",              "w",
   "x",              "y",              "z",              "braceleft",
   "bar",            "braceright",     "tilde",          "dieresis",
   "asciicircum",    "asciitilde",     "Ccedilla",       "Iacute",
   "Icircumflex",    "atilde",         "edieresis",      "egrave",
   "scaron",         "zcaron",         "Eth",            "ff",
   "ffi",            "ffl",            ".notdef",        ".notdef",
   ".notdef",        ".notdef",        "Scaron",         ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   "Ydieresis",      ".notdef",        "Zcaron",         ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        "cent",           "sterling",
   "fraction",       "yen",            "florin",         "section",
   "currency",       "copyright",      "quotedblleft",   "guillemotleft",
   "guilsinglleft",  "guilsinglright", "fi",             "fl",
   "degree",         "endash",         "dagger",         "daggerdbl",
   "periodcentered", ".notdef",        "paragraph",      "bullet",
   "quotesinglbase", "quotedblbase",   "quotedblright",  "guillemotright",
   "ellipsis",       "perthousand",    ".notdef",        ".notdef",
   "Agrave",         "Aacute",         "Acircumflex",    "Atilde",
   "Adieresis",      "Aring",          ".notdef",        "dotaccent",
   "Egrave",         "Eacute",         "Ecircumflex",    "Edieresis",
   "Igrave",         "hungarumlaut",   "ogonek",         "Idieresis",
   "emdash",         "Ntilde",         "Ograve",         "Oacute",
   "Ocircumflex",    "Otilde",         "Odieresis",      ".notdef",
   ".notdef",        "Ugrave",         "Uacute",         "Ucircumflex",
   "Udieresis",      "Yacute",         "Thorn",          ".notdef",
   "agrave",         "aacute",         "acircumflex",    "ordfeminine",
   "adieresis",      "aring",          ".notdef",        "ccedilla",
   "Lslash",         "eacute",         "ecircumflex",    "ordmasculine",
   "igrave",         "iacute",         "icircumflex",    "idieresis",
   ".notdef",        "ntilde",         "ograve",         "oacute",
   "ocircumflex",    "otilde",         "odieresis",      ".notdef",
   "lslash",         "ugrave",         "uacute",         "ucircumflex",
   "udieresis",      "yacute",         "thorn",          "ydieresis"
);

@adobeenc=(
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   "space",          "exclam",         "quotedbl",       "numbersign",
   "dollar",         "percent",        "ampersand",      "quoteright",
   "parenleft",      "parenright",     "asterisk",       "plus",
   "comma",          "hyphen",         "period",         "slash",
   "zero",           "one",            "two",            "three",
   "four",           "five",           "six",            "seven",
   "eight",          "nine",           "colon",          "semicolon",
   "less",           "equal",          "greater",        "question",
   "at",             "A",              "B",              "C",
   "D",              "E",              "F",              "G",
   "H",              "I",              "J",              "K",
   "L",              "M",              "N",              "O",
   "P",              "Q",              "R",              "S",
   "T",              "U",              "V",              "W",
   "X",              "Y",              "Z",              "bracketleft",
   "backslash",      "bracketright",   "asciicircum",    "underscore",
   "quoteleft",      "a",              "b",              "c",
   "d",              "e",              "f",              "g",
   "h",              "i",              "j",              "k",
   "l",              "m",              "n",              "o",
   "p",              "q",              "r",              "s",
   "t",              "u",              "v",              "w",
   "x",              "y",              "z",              "braceleft",
   "bar",            "braceright",     "asciitilde",     ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "exclamdown",     "cent",           "sterling",
   "fraction",       "yen",            "florin",         "section",
   "currency",       "quotesingle",    "quotedblleft",   "guillemotleft",
   "guilsinglleft",  "guilsinglright", "fi",             "fl",
   ".notdef",        "endash",         "dagger",         "daggerdbl",
   "periodcentered", ".notdef",        "paragraph",      "bullet",
   "quotesinglbase", "quotedblbase",   "quotedblright",  "guillemotright",
   "ellipsis",       "perthousand",    ".notdef",        "questiondown",
   ".notdef",        "grave",          "acute",          "circumflex",
   "tilde",          "macron",         "breve",          "dotaccent",
   "dieresis",        ".notdef",       "ring",           "cedilla",
   ".notdef",        "hungarumlaut",   "ogonek",         "caron",
   "emdash",         ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "AE",             ".notdef",        "ordfeminine",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   "Lslash",         "Oslash",         "OE",             "ordmasculine",
   ".notdef",        ".notdef",        ".notdef",        ".notdef",
   ".notdef",        "ae",             ".notdef",        ".notdef",
   ".notdef",        "dotlessi",       ".notdef",        ".notdef",
   "lslash",         "oslash",         "oe",             "germandbls",
   ".notdef",        ".notdef",        ".notdef",        ".notdef"
);

###############
# Read DEF file
###############
#
open DEF, $opt_d or die "Cannot open $opt_d: $!\n";
while (<DEF>) {
   next if (/^\s*$/ || /^\#/);
   s/\s*(\#.*)?$//;
   push @deflines, $_;
}
close DEF;

###############
# Read VPL file
###############
#
# File header
#
$vplhead = <> or exit 1;
unless ($vplhead =~ /^\((VTITLE|FAMILY) /) {
   die "$filename is not a vpl file: giving up\n"
}
do {
   $_ = <>;
   $vplhead .= $_;
} until ($_ =~ /^\(LIGTABLE$/ or eof);
if (eof) {
   die "$filename does not seem to be a text font (no LIGTABLE): giving up\n";
}
if ($vplhead =~ s/\A\(VTITLE(.*)$/$vtitle\n(COMMENT Old vtitle:$1/m) {
   $vplhead =~ s/\n\(COMMENT Please edit that VTITLE .*\)$//m;
   @enc = @dvipsenc;
   $dvips = 1;
}
elsif ($vplhead =~ /\A\(FAMILY.*\)\n\(CODINGSCHEME ADOBESTANDARDENCODING\)$/m) {
   $vplhead =~ s/\A/$vtitle\n/m;
   @enc = @adobeenc;
   $dvips = 0;
}
else {
   $vplhead =~ s/\A/$vtitle\n/m;
   @enc = @TeXenc;
   $dvips = 0;
}
if ($vplhead =~ /^\(CODINGSCHEME TEX MATH SYMBOLS/m) {
   die "$filename is a TeX math font: giving up\n";
}
unless ($vplhead =~ s/^(\(CODINGSCHEME .*\+\s?)(\S+)\)$/$1$encname)/m) {
   $vplhead =~ s/^(\(CODINGSCHEME .*)\)$/$1 + $encname)/m;
}
if ($vplhead =~ /^\(DESIGNUNITS R (.+)\)/m)    { $scale = $1 }
else {$scale = 1 }
if ($vplhead =~ /^   \(SLANT R (.+)\)/m)       { $slant = $1 }
if ($vplhead =~ /^   \(XHEIGHT [DR] (.+)\)/m)  { $xheight = $1 }
unless ($vplhead =~ /^\(MAPFONT /m) {
   if ($vplhead =~ /^\(DESIGNSIZE R (.*)\)$/m) { $dsize = $1  }
   $mapfont = "\n(MAPFONT D 0\n   (FONTNAME $fontname)\n   (FONTDSIZE R $dsize)\n   )";
   $vplhead =~ s/\n\(LIGTABLE\Z/$mapfont$&/m;
}
if ($opt_s) {
   $vplhead =~ s[^(\(MAPFONT D )0(.*?)(   \))]
                [$&\n${1}1$2   (FONTAT R ${ \($shrink * $scale) })\n$3]ms;
}

#
# Ligatures and kerns
#
do {
   $_ = <>;
   s/ \(comment .*$//i;
   $ligs .= $_;
} until $_ =~ /^   \)/;
#
# Now build a hash to convert from vpl representation to char name
# and use it to make ligtable readable
#
foreach $i (0 .. 255) { $vc{$nv[$i]} = $enc[$i] }
$ligs =~ s/^(   \((?:LABEL|KRN) )(\S+ \S+)(.*\))$/$1$vc{$2}$3/gm;
$ligs =~ s/^(   \(LIG )(\S+ \S+) (\S+ \S+)\)$/$1$vc{$2} $vc{$3})/gm;

#
# Character definitions: store "encoded" defs in @chars, store *all*
# defs in %allchars
#
$_ = <>;
do {
   if (/^\(CHARACTER/) {
      $character = $_;
      do {
	 $_ = <>;
	 $character .= $_;
      } until $_ =~ /^   \)/;
      storeinfo($character);
      $_ = <>;
   }
} until eof;
foreach $i (0 .. $#chars) {
   if ($chars[$i] and $enc[$i] ne ".notdef") {
      $allchars{$enc[$i]} = $chars[$i];
   }
}

##################
# Set up constants
##################

$subacc = "(cedilla|ogonek|commaaccent)";
$supacc = "(grave|acute|circumflex|tilde|macron|breve|dotaccent|overdot|dieresis|ring|hungarumlaut|caron|candrabindu)";
$underacc = "(underdot|under$supacc)";
$accents = "($subacc|$supacc|$underacc|underbar)";
$underadp = 0.230;				# depth of "under" accs
$underddp = 0.213;				# depth of underdot
if ($bold) { $thk = 0.072 } else { $thk = 0.052 }	# thickness and
$underbdp = 0.082 + $thk;				# depth of underbar
$capheight = $allchars{"X"}{ht};
$accheight = $allchars{"macron"}{ht};
$accdepth = $accheight - $thk * $scale;	# probable approx. "depth" of macron
$v1 = $accheight - $xheight;		# vertical offset for double accents
$v2 = $capheight - $xheight;		# vertical offset for accented caps etc
if ($scaps)  {						# accented small caps
    $scoffset = $allchars{"x"}{ht} - $xheight;
    $v1 += $scoffset;
}
if ($opt_c) {						# candrabindu
   ($cbx, $cby) = $opt_c =~ /^(.*),(.*)$/;
   $cbx += ($allchars{"breve"}{wd} - $allchars{"dotaccent"}{wd}) / 2;
   $cbx /= 1000;
   $cby /= 1000;
}

######################
# Build the characters
######################
#
# First normalise dvips.enc encoding quirks
#
if ($dvips) {
   chmove("fi", 014);
   chmove("fl", 015);
   chmove("quotedblright", 042);
   chmove("quotedblleft", 0134);
   chmove("dotaccent", 0137);
   chmove("endash", 0173);
   chmove("emdash", 0174);
   chmove("hungarumlaut", 0175);
}
#
# Now build a list of definitions supplied by user
#
for (@deflines) {
   if (/^\s*(\d+|0[0-7]+|0x[0-9a-fA-F]+)\s+([a-zA-Z]+?|\.notdef)(?:\s+([a-zA-Z]+))?$/) {
      ($num, $char, $acc)  = ($1, $2, $3);
      $num = oct $num if $num =~ /^0/;
      if ($num > 255) { die "Bad definition (number out of range): $_\n" }
      $def = {};
      $def->{qdef}  = $_;
      $def->{num}   = $num;
      $def->{char}  = $char;
      $def->{acc}   = $acc;
      $def->{nchar} = $char . $acc;
      push @nchars, $def->{nchar};
      push @defs,   $def;
   }
   else { die "Bad definition: $_\n" }
}
#
# Work through the list
#
while (@defs) {
   $def  = shift @defs;
   $qdef  = $def->{qdef};
   $num   = $def->{num};
   $char  = $def->{char};
   $acc   = $def->{acc};
   $nchar = $def->{nchar};
   #
   # If we can't handle $char/$acc yet, but believe we will be able
   # to later, send the definition to the back of the queue. In case
   # it later turns out we were wrong, allow only five loops before
   # giving up.
   #
   if (!$allchars{$char} and $char ne ".notdef") {
      if (grep /^$char$/, @nchars) {
	 unless (++$def->{requeue} > 5) {
	    push @defs, $def;
	    next;
	 }
      }
      else { die "Bad definition (no such character): $qdef\n" }
   }
   if ($acc and !$allchars{$acc} and $acc !~ /^$accents$/) {
      if (grep /^$acc$/, @nchars) {
	 unless (++$def->{requeue} > 5) {
	    push @defs, $def;
	    next;
	 }
      }
      else { die "Bad definition (no such accent): $qdef\n" }
   }
   #
   # Remove any existing claims on $num
   #
   @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/,
     @{ $allchars{$chars[$num]{id}}{num} };
   #
   # First deal with .notdef
   #
   if ($nchar eq ".notdef") {
      undef $chars[$num];
   }
   #
   # Next look among existing chars (unless blocked by -b)
   #
   elsif (!($acc and $opt_b) and $allchars{$nchar}) {
      push( @{ $allchars{$nchar}{num} }, $num);
      $chars[$num] = $allchars{$nchar};
   }
   #
   # If it can't be built from sub-elements, issue a warning and move on
   #
   elsif (!$acc) {
      warn "No such character - ignoring definition: $qdef\n";
      undef $chars[$num];
   }
   #
   # Now build the char
   #
   else {
      #
      # First get rid of predefined/duplicated ligtable statements
      # and character definitions; also synonyms
      #
      $ligs =~ s/\n   \((LABEL|KRN|LIG) ($nchar .*|.*$nchar)\)$//gm;
      $allchars{$nchar} = ();
      if ($acc eq "overdot") {
         $nchar2 = $char . "dotaccent";
	 $ligs =~ s/\n   \((LABEL|KRN|LIG) ($nchar2 .*|.*$nchar2)\)$//gm;
	 delete $allchars{$nchar2};
      }
      #
      # Go!
      #
      if ($acc =~ /^$subacc$/) {
	 subacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^$supacc$/) {
	 supacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^$underacc$/) {
	 underacc($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
      elsif ($acc =~ /^underbar$/) {
	 underb($num, $char, $nchar);
	 fixkerns($char, $acc);
      }
      else {
	 digraph($num, $char, $acc, $nchar);
	 fixkerns($char, $acc);
      }
   }
}

###################
# Sort out ligtable
###################
#
# Convert to vpl representation, eliminating statements invoking
# "unencoded" characters
#
@liglist = split /\n/, $ligs;
$ligs = "";
foreach (@liglist) {
   if (/^(   \(LIG \S+ )(\S+)\)$/) {
      if ($n = ${ $allchars{$2}{num} }[0]) {
	 s/^(   \(LIG \S+ )(\S+)\)$/$1$nv[$n])/;
      }
      else { next }
   }
   if (/^(   \((?:LABEL|LIG|KRN) )([^ )]+)(.*)$/) {
      ($one, $two, $three) = ($1, $2, $3);
      foreach $n (@{ $allchars{$two}{num} }) {
	 $ligs .= "$one$nv[$n]$three\n";
      }
   }
   else { $ligs .= "$_\n" }
}
#
# Eliminate sequences orphaned by elimination of a LABEL
#
@liglist = split /   \(STOP\)\n/, $ligs;
$ligs = "";
foreach (@liglist) {
   if (/^   \(LABEL /m) { $ligs .= "$_   (STOP)\n" }
   elsif (/^   \)$/m) { $ligs .= $_ }
}
#
# Eliminate empty statements
#
$ligs =~ s/(^   \(LABEL .*\)\n)+   \(STOP\)\n//gm;

####################
# Output the results
####################
#
print $vplhead, $ligs;
foreach $i (0 .. 255) { if (defined $chars[$i]{id}) { printchar($i) } }

#####################
# End of main program
#####################

sub storeinfo {
   #
   # Extract info from a character definition and store it in @chars
   #
   my $char = shift;
   my $num;
   if ($char =~ /\A\(CHARACTER O ([0-7]+)/m) { $num = oct $1 }
   elsif ($char =~ /\A\(CHARACTER C (.)/m) { $num = ord $1 }
   $chars[$num]{id} = $enc[$num];
   push( @{ $chars[$num]{num} }, $num);
   if ($char =~ /^   \(CHARWD R (.*?)\)$/m) { $chars[$num]{wd} = $1 }
   if ($char =~ /^   \(CHARHT R (.*?)\)$/m) { $chars[$num]{ht} = $1 }
   if ($char =~ /^   \(CHARDP R (.*?)\)$/m) { $chars[$num]{dp} = $1 }
   if ($char =~ /^   \(CHARIC R (.*?)\)$/m) { $chars[$num]{ic} = $1 }
   if ($char =~ /^   \(MAP\n((.|\n)*)^      \)/m) { $chars[$num]{map} = $1 }
   else { $chars[$num]{map} = "      (SETCHAR $nv[$num])\n" }
}

sub printchar {
   #
   # Extract info from @chars and build it into a character definition
   #
   my $num = shift;
   print "(CHARACTER ";
   if (chr($num) =~ /[0-9A-Za-z]/) { print "C " . chr($num) }
   else {
      printf "O %lo", $num;
      print " (COMMENT " . $chars[$num]{id} . ")";
   }
   print "\n";
   print "   (CHARWD R " . $chars[$num]{wd} . ")\n" if $chars[$num]{wd};
   print "   (CHARHT R " . $chars[$num]{ht} . ")\n" if $chars[$num]{ht};
   print "   (CHARDP R " . $chars[$num]{dp} . ")\n" if $chars[$num]{dp};
   print "   (CHARIC R " . $chars[$num]{ic} . ")\n" if $chars[$num]{ic};
   print "   (MAP\n";
   print $chars[$num]{map};
   print "      )\n";
   print "   )\n";
}

sub chmove {
   #
   # Move a character
   #
   my ($char, $num) = @_;
   my $i;
   foreach $i (@{ $allchars{$char}{num} }) { undef $chars[$i] }
   @{ $allchars{$char}{num} } = ();
   @{ $allchars{$chars[$num]{id}}{num} } = grep !/$num/,
     @{ $allchars{$chars[$num]{id}}{num} };
   push( @{ $allchars{$char}{num} }, $num);
   $chars[$num] = $allchars{$char};
}

sub max {
   #
   # Return greater of two values
   #
   my ($a, $b) = @_;
   return $a > $b ? $a : $b;
}

sub subacc {
   #
   # Subscript accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($h, $s1, $s2, $s3);
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{dp} = $allchars{$acc}{dp};
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2);
   if ($h > 0) { $s2 = "      (MOVERIGHT R $h) " }
   elsif ($h < 0) {
      $h = -$h;
      $s2 = "      (MOVELEFT R $h) ";
   }
   else { $s2 = "      " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
}

sub supacc {
   #
   # Superscript accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($cb, $h, $hadj, $tallchar, $ic, $s1, $s2, $s3);
   if ($char eq "i" and $allchars{"dotlessi"}) { $char = "dotlessi" }
   if ($char eq "j" and $allchars{"dotlessj"}) { $char = "dotlessj" }
   if ($acc eq "overdot") { $acc = "dotaccent" }
   if ($acc eq "candrabindu") {
      $acc = "breve";
      ($cb = $allchars{"dotaccent"}{map}) =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   }
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$acc}{ht};
   $allchars{$id}{dp} = $allchars{$char}{dp};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   if ($scaps and $char =~ /^[a-z]/) {			# accented small caps
      $tallchar = 1;
      if ($char =~ /$supacc$/
	   and $char !~ /under$supacc$/) {		# double accs
	 $s2 = "      (MOVEUP R $v1)";
	 $allchars{$id}{ht} += $v1;
	 $hadj = sprintf("%.3f", $v1 * $slant);
	 $ic = $allchars{$char}{ic};
      }
      else {						# single accs
	 $s2 = "      (MOVEUP R $scoffset)";
	 $allchars{$id}{ht} += $scoffset;
	 $hadj = sprintf("%.3f", $scoffset * $slant);
	 $ic = $allchars{$acc}{ic} + $hadj;
      }
   }
   elsif ($allchars{$char}{ht} >= ($accheight + $v2)) {	# double accs
      $tallchar = 1;					# on caps etc.
      $s2 = "      (MOVEUP R ${ \($v1 + $v2) })";
      $allchars{$id}{ht} += ($v1 + $v2);
      $hadj = sprintf("%.3f", ($v1 + $v2) * $slant);
      $ic = $allchars{$char}{ic};
   }
   elsif ($allchars{$char}{ht} > 1.15 * $xheight) {
      $tallchar = 1;
      if ($char =~ /$supacc$/
	   and $char !~ /under$supacc$/) {		# double accs
	 $s2 = "      (MOVEUP R $v1)";
	 $allchars{$id}{ht} += $v1;
	 $hadj = sprintf("%.3f", $v1 * $slant);
	 $ic = $allchars{$char}{ic};
      }
      else {						# caps etc.
	 $s2 = "      (MOVEUP R $v2)";
	 $allchars{$id}{ht} += $v2;
	 $hadj = sprintf("%.3f", $v2 * $slant);
	 $ic = $allchars{$char}{ic};
      }
   }
   else {						# single accs
      $s2 = "     ";
      $ic = $allchars{$acc}{ic};
   }
   $h = sprintf("%.3f", ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2);
   unless ($tallchar) { $ic -= $h }
   $allchars{$id}{ic} = $ic unless $ic < 0;
   $h += $hadj;
   if ($h > 0) { $s2 .= " (MOVERIGHT R $h) " }
   elsif ($h < 0)  {
      $h = -$h;
      $s2 .= " (MOVELEFT R $h) ";
   }
   else { $s2 .= " " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   if ($cb) {						# candrabindu
      $cb = $s2 . $cb;
      if ($cbx) {
	 unless (($cb =~ s/(MOVERIGHT R )([0-9.]+)/$1 . ($2 + $cbx * $scale)/e)
	   or ($cb =~ s/(MOVELEFT R )([0-9.]+)/$1 . ($2 - $cbx * $scale)/e)) {
	      $cb =~ s/^( +)/"$1(MOVERIGHT R " . ($cbx * $scale) . ") "/e;
	   }
      }
      if ($cby) {
	 unless ($cb =~ s/(MOVEUP R )([0-9.]+)/$1 . ($2 + $cby * $scale)/e) {
	    $cb =~ s/^( +)/"$1(MOVEUP R " . ($cby * $scale) . ") "/e;
	 }
      }
      $cb .= "\n";
      $s1 = "(PUSH) " . $s1;
      $s3 .= " (POP)";
   }
   $allchars{$id}{map} = "      $s1\n$s2$s3\n$cb";
   $chars[$num] = $allchars{$id};
}

sub underacc {
   #
   # Dropped accents
   #
   my ($num, $char, $acc, $id) = @_;
   my ($h, $v, $s1, $s2, $s3);
   $acc =~ s/^under//;
   if ($acc eq "dot") { $acc = "period" }
   if ($acc eq "candrabindu") { die "Bad definition (no such accent): $qdef\n" }
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   if ($acc =~ /^$supacc$/) {
      $v = $allchars{$id}{dp} = $underadp * $scale * $shrink;
      $v += ($accdepth * $shrink);
   }
   else {
      $v = $allchars{$id}{dp} = $underddp * $scale + $allchars{$acc}{dp};
   }
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   if ($acc =~ /^$supacc$/) {
      $h = ($allchars{$char}{wd} - ($allchars{$acc}{wd} * $shrink)) / 2 - $v * $slant;
      if ($opt_s) { $s2 = "      (SELECTFONT D 1)\n" }
   }
   else {
      $h = ($allchars{$char}{wd} - $allchars{$acc}{wd}) / 2 - $v * $slant;
   }
   $h = sprintf("%.3f", $h);
   if ($h > 0) {
      $s2 .= "      (MOVEDOWN R $v) (MOVERIGHT R $h) ";
   }
   elsif ($h < 0) {
      $h = -$h;
      $s2 .= "      (MOVEDOWN R $v) (MOVELEFT R $h) ";
   }
   else  { $s2 = "      (MOVEDOWN R $v) " }
   $s3 = $allchars{$acc}{map};
   $s3 =~ s/\A.*(\(SETCHAR . .+?\)).*\Z/$1/s;
   if ($opt_s and $acc =~ /^$supacc$/) { $s3 .= "\n      (SELECTFONT D 0)" }
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
}

sub underb {
   #
   # Underbar
   #
   my ($num, $char, $id) = @_;
   my ($h, $w, $dp, $s1, $s2, $s3);
   $allchars{$id}{wd} = $allchars{$char}{wd};
   $allchars{$id}{ht} = $allchars{$char}{ht};
   $allchars{$id}{dp} = $dp = $underbdp * $scale;
   $allchars{$id}{ic} = $allchars{$char}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   $s1 =~ s/\A      (.*)\n\Z/(PUSH) $1 (POP)/s;
   $h = sprintf("%.3f", ($allchars{$id}{wd} / 10 - $dp * $slant));
   $w = sprintf("%.3f", ($allchars{$id}{wd} * 8 / 10));
   if ($h > 0) {
      $s2 = "      (MOVEDOWN R $dp) (MOVERIGHT R $h) ";
   }
   elsif ($h < 0) {
      $h = -$h;
      $s2 = "      (MOVEDOWN R $dp) (MOVELEFT R $h) ";
   }
   else { $s2 .= "      (MOVEDOWN R $dp) " }
   $s3 = "(SETRULE R ${ \($thk * $scale) } R $w)";
   $allchars{$id}{map} = "      $s1\n$s2$s3\n";
   $chars[$num] = $allchars{$id};
}

sub digraph {
   #
   # Make a new character consisting of two existing characters
   #
   my ($num, $char, $acc, $id) = @_;
   my ($one, $two, $kern, $s1, $s2);
   if ($ligs =~ /^   \(LABEL $char\)\n(.*?)\n   \(KRN $acc R (-?[0-9.]+)\)\n/ms) {
      ($one, $two) = ($1, $2);
   }
   $kern = $two if $one !~ /^   \(STOP\)$/m;
   $allchars{$id}{wd} = $allchars{$char}{wd} + $allchars{$acc}{wd};
   $allchars{$id}{wd} += $kern;
   $allchars{$id}{ht} = max($allchars{$char}{ht}, $allchars{$acc}{ht});
   $allchars{$id}{dp} = max($allchars{$char}{dp}, $allchars{$acc}{dp});
   $allchars{$id}{ic} = $allchars{$acc}{ic};
   $allchars{$id}{id} = $id;
   push( @{ $allchars{$id}{num} }, $num);
   $s1 = $allchars{$char}{map};
   chomp ($s2 = $allchars{$acc}{map});
   $s1 =~ s/(\(SETCHAR .*?\))/$1\n$s2/;
   if ($kern) {
      if ($kern < 0) {
	 $kern = -$kern;
	 $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVELEFT R $kern)/;
      }
      else { $s1 =~ s/(\(SETCHAR .*?\))/$1 (MOVERIGHT R $kern)/ }
   }
   $allchars{$id}{map} = $s1;
   $chars[$num] = $allchars{$id};
}

sub fixkerns {
   #
   # Generalise the kerning info contained in the vpl file by applying
   # it to new accented chars. Do not kern lower-case chars bearing
   # superscript accents with capitals, quotes or a preceding "f".
   #
   my ($char, $acc) = @_;
   my ($olabel, $nlabel, @liglist, $lchar, $rchar);
   if ($acc =~ /^$accents$/) { $lchar = $rchar = $char }
   else {
      $lchar = $char;
      $rchar = $acc;
   }
   unless ($char =~ /^[a-z]/ and $acc =~ /^$supacc$/) {
      $ligs =~ s[(\n   \(LABEL )$rchar\)(?!\n   \(LIG.*$)]
		[$&$1$char$acc)]gm
                unless $ligs =~ /\n   \(LABEL $char$acc\)$/m;
      $ligs =~ s[(\n   \(LABEL )$rchar\)(\n   \(LIG.*$)+(?!\n   \(STOP\))]
		[$&$1$char$acc)]gm
                unless $ligs =~ /\n   \(LABEL $char$acc\)$/m;
      $ligs =~ s[(\n   \(KRN )$lchar( .*)$]
		[$&$1$char$acc$2]gm;
   }
   else {
      if ($ligs =~ /\n   \(LABEL $char\).*?\(STOP\)/s) {
	 $nlabel = $olabel = $&;
	 $nlabel =~ s/(\n   \(LABEL $char)\)/$1$acc)/
	   unless $ligs =~ /\n   \(LABEL $char$acc\)/m;
         $nlabel =~ s/\n   \(LIG .*\)$//gm;
         $nlabel =~ s/\n   \(LABEL (?!$char$acc).*\)$//gm;
	 $nlabel =~ s/\n   \(KRN ([A-Z]|quote).*\)$//gm;
	 $ligs =~ s/(\n   \(LABEL )$char\).*?\(STOP\)/$olabel$nlabel/s;
      }
      @liglist = split /\n   \(STOP\)/, $ligs;
      foreach (@liglist) {
	 unless (/\n   \(LABEL ([A-Zf]|quote).*\)$/m) {
	    s/(\n   \(KRN $char)( .*\))$/$&$1$acc$2/gm;
	 }
      }
      $ligs = join("\n   (STOP)", @liglist);
   }
}
